import random
import time

import pandas as pd
import requests
from lxml import etree

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWe'
                  'bKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
}


def getHTMLText(start_url):
    try:
        r = requests.get(start_url, timeout=30, headers=headers)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return '异常'


path = r'C:\Users\-\Desktop\平安同义词v1.xls'
url = 'https://baike.baidu.com/item/'

d = pd.read_excel(path)
print(type(d['疾病名称']))

for index, name in enumerate(d['疾病名称']):
    print(index + 2, name)
    start_url = url + name
    text = getHTMLText(start_url)
    # print(text)
    tree = etree.HTML(text)
    # print(tree)

    try:
        alias = tree.xpath('//span[@class="view-tip-panel"]')[0].xpath('string(.)')
    except:
        alias = ''
    print('alias:', alias)
    divs = tree.xpath('//div[@class="lemma-summary"]/div')
    text = ''
    for div in divs:
        text += div.xpath('string(.)')

    content2 = list(zip(tree.xpath('//div[@class="dl-baseinfo"]/dl/dt/text()'),
                        tree.xpath('//div[@class="dl-baseinfo"]/dl/dd/@title')))
    print('简介：', text)
    print(content2)
    print('---------------------------------------------')
    time.sleep(3 * random.random())
# print(d['同义词'].count())
